{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This dataset has 100 samples and 5 features\n",
      "[[ 0.  0.  1.  1.  1.]\n",
      " [ 1.  1.  0.  1.  0.]\n",
      " [ 1.  0.  1.  1.  0.]\n",
      " [ 0.  0.  1.  1.  1.]\n",
      " [ 0.  1.  0.  0.  1.]]\n",
      "[((2, 3), 25),\n",
      " ((2, 4), 27),\n",
      " ((3, 2), 25),\n",
      " ((3, 4), 21),\n",
      " ((4, 2), 27),\n",
      " ((4, 3), 21),\n",
      " ((0, 1), 14),\n",
      " ((0, 3), 5),\n",
      " ((1, 0), 14),\n",
      " ((1, 3), 9),\n",
      " ((3, 0), 5),\n",
      " ((3, 1), 9),\n",
      " ((0, 2), 4),\n",
      " ((2, 0), 4),\n",
      " ((1, 4), 19),\n",
      " ((4, 1), 19),\n",
      " ((0, 4), 17),\n",
      " ((4, 0), 17),\n",
      " ((1, 2), 7),\n",
      " ((2, 1), 7)]\n",
      "Rule #1\n",
      "Rule: If a person buys cheese they will also buy bananas\n",
      " - Confidence: 0.659\n",
      " - Support: 27\n",
      "Rule #2\n",
      "Rule: If a person buys bananas they will also buy cheese\n",
      " - Confidence: 0.458\n",
      " - Support: 27\n",
      "Rule #3\n",
      "Rule: If a person buys cheese they will also buy apples\n",
      " - Confidence: 0.610\n",
      " - Support: 25\n",
      "Rule #4\n",
      "Rule: If a person buys apples they will also buy cheese\n",
      " - Confidence: 0.694\n",
      " - Support: 25\n",
      "Rule #5\n",
      "Rule: If a person buys apples they will also buy bananas\n",
      " - Confidence: 0.583\n",
      " - Support: 21\n",
      "Rule #1\n",
      "Rule: If a person buys apples they will also buy cheese\n",
      " - Confidence: 0.694\n",
      " - Support: 25\n",
      "Rule #2\n",
      "Rule: If a person buys cheese they will also buy bananas\n",
      " - Confidence: 0.659\n",
      " - Support: 27\n",
      "Rule #3\n",
      "Rule: If a person buys bread they will also buy bananas\n",
      " - Confidence: 0.630\n",
      " - Support: 17\n",
      "Rule #4\n",
      "Rule: If a person buys cheese they will also buy apples\n",
      " - Confidence: 0.610\n",
      " - Support: 25\n",
      "Rule #5\n",
      "Rule: If a person buys apples they will also buy bananas\n",
      " - Confidence: 0.583\n",
      " - Support: 21\n"
     ]
    }
   ],
   "source": [
    "import numpy as np #导入numpy 包\n",
    "dataset_filename = \"affinity_dataset.txt\" #从文件中加载数据\n",
    "X = np.loadtxt(dataset_filename) #读取数据\n",
    "n_samples, n_features = X.shape #numpy 包中 shape 根据矩阵的获得一个元组\n",
    "print(\"This dataset has {0} samples and {1} features\".format(n_samples, n_features))\n",
    "print(X[:5]) #查看前5行\n",
    "features = [\"bread\", \"milk\", \"cheese\", \"apples\", \"bananas\"]\n",
    "rule_valid = 0 #记录符合规则数\n",
    "rule_invalid = 0 #记录不符合规则数\n",
    "from collections import defaultdict # 导入 默认字典\n",
    "valid_rules = defaultdict(int)\n",
    "invalid_rules = defaultdict(int)\n",
    "num_occurences = defaultdict(int)\n",
    "for sample in X:\n",
    "    for premise in range(n_features):\n",
    "        if sample[premise] == 0: continue\n",
    "        num_occurences[premise] += 1 # 符合guiz\n",
    "        for conclusion in range(n_features):\n",
    "            if premise == conclusion:\n",
    "                continue\n",
    "            if sample[conclusion] == 1:\n",
    "                valid_rules[(premise, conclusion)] += 1\n",
    "            else:\n",
    "                invalid_rules[(premise, conclusion)] += 1\n",
    "support = valid_rules\n",
    "confidence = defaultdict(float)\n",
    "for premise, conclusion in valid_rules.keys():\n",
    "    confidence[(premise, conclusion)] = valid_rules[(premise, conclusion)] / num_occurences[premise]\n",
    "# 定义打印 规则函数\n",
    "def print_rule(premise, conclusion, support, confidence, features):\n",
    "    premise_name = features[premise]\n",
    "    conclusion_name = features[conclusion]\n",
    "    print(\"Rule: If a person buys {0} they will also buy {1}\".format(premise_name, conclusion_name))\n",
    "    print(\" - Confidence: {0:.3f}\".format(confidence[(premise, conclusion)]))\n",
    "    print(\" - Support: {0}\".format(support[(premise, conclusion)]))\n",
    "from pprint import pprint # 支持排序\n",
    "pprint(list(support.items()))\n",
    "from operator import itemgetter\n",
    "sorted_support = sorted(support.items(), key=itemgetter(1), reverse =True) # reverse= True  倒序\n",
    "for index in range(5):\n",
    "    print(\"Rule #{0}\".format(index + 1))\n",
    "    (premise, conclusion) = sorted_support[index][0]\n",
    "    print_rule(premise, conclusion, support, confidence, features)\n",
    "    \n",
    "sorted_confidence = sorted(confidence.items(), key=itemgetter(1), reverse=True)\n",
    "for index in range(5):\n",
    "    print(\"Rule #{0}\".format(index + 1))\n",
    "    (premise, conclusion) = sorted_confidence[index][0]\n",
    "    print_rule(premise, conclusion, support, confidence, features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
